This document describes the RINKO data analysis and visualization.

Read data

#load packages
library(dplyr)
library(ggplot2)
library(readxl)

The name of the columns need to be re-defined

# load xls file
rinkototal <- read.csv("~/Documents/GitHub/DNA_monitOki/Environmental parameters/rinko_all_outliers_removed.csv")

#remove unwanted col
less_col<- rinkototal[ , -c(1,2,6,7,18:21)]

# calculate mean and sd of all the samples by day and sampling site

mean_sd_rinko<- less_col %>%
  group_by(date, location, sample) %>%
  summarise(across(
    .cols = is.numeric, 
    .fns = list(Mean = mean, SD = sd), na.rm = TRUE, 
    .names = "{col}_{fn}"
    ))

#substitute strings with different patterns
#in the location column

mean_sd_rinko$location<- gsub('R1', 'R1',
           gsub('R2', 'R2',
           gsub('U1', 'U1', 
           gsub('U2', 'U2', mean_sd_rinko$location))))

#in the sample column
mean_sd_rinko$sample<- gsub('R1C', 'R1C',
           gsub('R1N', 'R1N',
           gsub('R1S', 'R1S', 
           gsub('R2C', 'R2C', 
           gsub('R2N', 'R2N', 
           gsub('R2S', 'R2S',       
           gsub('U1C', 'U1C', 
           gsub('U1N', 'U1N',
           gsub('U1S', 'U1S',
           gsub('U2C', 'U2C',
           gsub('U2N', 'U2N',
           gsub('U2S', 'U2S', mean_sd_rinko$sample))))))))))))

# Adding column with position (C, N, S) based on the sample column:
mean_sd_rinko<- mean_sd_rinko %>%
  mutate(Position = case_when(
    endsWith(sample, "C") ~ "C",
    endsWith(sample, "S") ~ "S",
    endsWith(sample, "N") ~ "N"
    ))

#reorganize columns 
mean_sd_rinko <- mean_sd_rinko[, c(1, 2, 3, 24, 4:23)]

EDA main variables

Water temp

#At different SS, so we can see differences in position
Poscol<- c("#264653", "#8AB17D","#E76F51")
temp_pos<-ggplot(mean_sd_rinko, aes(x=date, y=water_temp_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Water temperature (°C)") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

temp_pos

#combine three positions of each area
temp_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(water_temp_Mean), sd=sd(water_temp_SD))

areacol<- c("#264653", "#2A9D8F","#E76F51", "#F4A261")
all_temp<- ggplot(temp_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Water temperature (°C)") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_temp

Salinity

#At different SS, so we can see differences in position
sal_pos<-ggplot(mean_sd_rinko, aes(x=date, y=salinity_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Salinity") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

sal_pos

#combine three positions of each area
sal_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(salinity_Mean), sd=sd(salinity_SD))

all_sal<- ggplot(sal_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Salinity") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_sal

Turbidity

#At different SS, so we can see differences in position
turb_pos<-ggplot(mean_sd_rinko, aes(x=date, y=tur_range_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Turbidity") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

turb_pos

#combine three positions of each area
turb_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(tur_range_Mean), sd=sd(tur_range_SD))

all_turb<- ggplot(turb_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Turbidity") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_turb

Disolved Oxygen (DO)

#At different SS, so we can see differences in position
DO_pos<-ggplot(mean_sd_rinko, aes(x=date, y=do_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("DO") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

DO_pos

#combine three positions of each area
do_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(do_Mean), sd=sd(do_SD))

all_DO<- ggplot(do_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("DO") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_DO

Chlorophyll f

#At different SS, so we can see differences in position
ChlF_pos<-ggplot(mean_sd_rinko, aes(x=date, y=chl_f_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Chlorophyll f") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

ChlF_pos

#combine three positions of each area
cf_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(chl_f_Mean), sd=sd(chl_f_SD))

all_cf<- ggplot(cf_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Chlorophyll f") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_cf

Chlorophyll a Exactly same values as the chlorophyll f. Look deeper into this.

#At different SS, so we can see differences in position
ChlA_pos<-ggplot(mean_sd_rinko, aes(x=date, y=chl_a_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Chlorophyll a") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

ChlA_pos

#combine three positions of each area
ca_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(chl_a_Mean), sd=sd(chl_a_SD))

all_ca<- ggplot(ca_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Chlorophyll a") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_ca

Other variables

Conductivity Usually correlated with temperature. A decrease in the viscosity of water increases the mobility of ions in water. As such, an increase in temperature thus increases conductivity.

#At different SS, so we can see differences in position
Cond_pos<-ggplot(mean_sd_rinko, aes(x=date, y=conductivity_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Conductivity") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

Cond_pos

#combine three positions of each area
Cond_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(conductivity_Mean), sd=sd(conductivity_SD))

all_Cond<- ggplot(Cond_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Conductivity") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_Cond

Density Usually inversed correlation with temperature. But it is also dependent on the salinity. In this case, central points from urban areas show low density levels because of the high volume of freshwater that is carried by the main stream.

#At different SS, so we can see differences in position
Dens_pos<-ggplot(mean_sd_rinko, aes(x=date, y=density_Mean, color=Position)) +  facet_wrap(~ location)+ geom_point(size=3, alpha=0.8) + theme_bw() + scale_color_manual(values=Poscol)+theme(text = element_text(size=12)) +ylab("Density") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

Dens_pos

#combine three positions of each area
Dens_mean_pos<- mean_sd_rinko %>% 
    group_by(date, location) %>%
    summarize(Mean = mean(density_Mean), sd=sd(density_SD))

all_Dens<- ggplot(Dens_mean_pos, aes(x=date, y=Mean, group=location)) +
           geom_point(aes(color=location)) +
           geom_line(aes(color=location)) +
           scale_colour_manual(values = areacol) +
           theme_bw()+ ylab("Density") +xlab("Date") + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, hjust=1)) 

all_Dens